## Warning: package 'vegan' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
Now read in the data files we’ll be working with
macro.data <- read.csv("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/CrownHeight_Data.csv", header=T)
min.tree <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/Macro_MinAges_HKY_Constrained_CON.tre")
#mean.tree <- read.nexus("/Users/Ian/Desktop/Macropod_Dating/Operators/Macro_MeanAges_Constrained/Macro_MeanAges_HKY_Constrained_CON.tre")
#max.tree <- read.nexus("/Users/Ian/Desktop/Macropod_Dating/Operators/Macro_MaxAges_Constrained_PF/Macro_MaxAges_HKY_Constrained_CON_fixed.tre")
#cp.min <- read.nexus("/Users/Ian/Desktop/Macropod_Dating/Operators/Macro_CPMinAges_Constrained_PF/Macro_CP_MinAges_CON.tre")
#cp.mean <- read.nexus("/Users/Ian/Desktop/Macropod_Dating/Operators/Macro_CPEstAges_Constrained_PF/Macro_CP_EstAges_CON.tre")
#cp.max <- read.nexus("/Users/Ian/Desktop/Macropod_Dating/Operators/Macro_CPMaxAges_Constrained_PF/Macro_CP_MaxAges_CON.tre")
Choose the current tree we want to work with
tree <- min.tree
The C4 plant reconstruction data from Andrae
enviro.data <- read.csv("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Andrea_S1.csv", header=T)
head(enviro.data)
## Age Age_Error C4_recon_mean C4_recon_lower C4_recon_upper
## 1 1.0 0.002 59.2 36.7 81.6
## 2 2.0 0.001 36.6 11.8 61.3
## 3 2.5 0.008 36.0 11.2 60.8
## 4 2.8 0.005 35.4 10.5 60.2
## 5 2.8 0.013 21.8 0.0 48.1
## 6 3.0 0.005 38.8 14.3 63.3
And the dust flux data from Andrae
flux.data <- read.csv("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Aeolian_Flux.csv", header=T)
head(flux.data)
## Age A_Flux
## 1 0.05931646 108.3949
## 2 0.10874684 113.8622
## 3 0.20760760 118.6517
## 4 0.25703797 121.9088
## 5 0.35589873 109.4722
## 6 0.40532911 116.3859
Trim tree and data down to overlapping taxa
# extract the taxa that are in both the tree and
overlaps <- intersect(tree$tip.label, unique(macro.data$Taxon))
trim.tree <- drop.tip(tree, setdiff(tree$tip.label, overlaps))
trim.data <- filter(macro.data, Taxon %in% overlaps)
head(trim.data)
## ID TI Taxon Taxon_notes Status Higher_tax
## 1 AM_M3164 3 Aepyprymnus_rufescens Modern Potoroinae
## 2 AM_M9697 3 Aepyprymnus_rufescens Modern Potoroinae
## 3 AM_M14017 2 Aepyprymnus_rufescens Modern Potoroinae
## 4 AM_M2267 3 Aepyprymnus_rufescens Modern Potoroinae
## 5 AM_B6980 2 Aepyprymnus_rufescens Modern Potoroinae
## 6 NMV_P201155 2 Baringa_nelsonensis Fossil Macropodinae
## Macropodin_status Sex Side Assemblage Deposit_type Est_age PW H_HYPCD
## 1 non_macropodin <NA> R Modern <NA> 0.0 4.39 3.89
## 2 non_macropodin M R Modern <NA> 0.0 4.33 4.30
## 3 non_macropodin M R Modern <NA> 0.0 4.61 4.69
## 4 non_macropodin <NA> R Modern <NA> 0.0 4.28 4.36
## 5 non_macropodin M R Modern <NA> 0.0 3.85 4.14
## 6 macropodin <NA> R Nelson_Bay Non_cave 0.8 4.93 5.62
OR trim tree and data down to just Macropodinae
macros <- filter(macro.data, Higher_tax == "Macropodinae")
overlaps <- intersect(tree$tip.label, unique(macros$Taxon))
trim.tree <- drop.tip(tree, setdiff(tree$tip.label, overlaps))
trim.data <- filter(macros, Taxon %in% overlaps)
create a tibble to get the species means (if you haven’t done this already)
sp.means <- trim.data %>%
group_by(Taxon) %>%
summarise_at(vars(H_HYPCD), mean)
#write.csv(sp.means, row.names=FALSE, file="/PATH/CrownHeight_Macropodinae_spMEANS.csv") # uncomment to write the file
Or you can just read in the file I’ve already prepared for the Macropodinae:
sp.means <- read.csv("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Couzens&Prideaux2018_dryad_package/data/CrownHeight_Macropodinae_spMEANS.csv", header=T)
head(sp.means)
## Taxon H_HYPCD
## 1 Baringa_nelsonensis 7.461111
## 2 Bohra_illuminata 6.280000
## 3 Congruus_congruus 6.865000
## 4 Dendrolagus_dorianus 4.010000
## 5 Dendrolagus_lumholtzi 3.953571
## 6 Dendrolagus_matschiei 4.045000
macro.means <- sp.means$H_HYPCD; names(macro.means) <- sp.means$Taxon
species.means <- as.data.frame(sp.means$H_HYPCD); rownames(species.means) <- sp.means$Taxon
If you’re just working with the Macropodinae, make sure to trim the tree down
macros <- filter(macro.data, Higher_tax == "Macropodinae")
overlaps <- intersect(tree$tip.label, unique(macros$Taxon))
trim.tree <- drop.tip(tree, setdiff(tree$tip.label, overlaps))
We were only given the mean and confidence intervals for the grass data, so let’s make a function to extrapolate data from those mean and confidence intervals
time.estimates <- function (some.data, reps){
time.col <- NULL; recon.col <- NULL
for(j in 1:nrow(enviro.data)) {
time.sd <- sd(c(enviro.data$Age[j], (enviro.data$Age[j] + enviro.data$Age_Error[j]), (enviro.data$Age[j] - enviro.data$Age_Error[j])))
time.samples <- rnorm(reps, enviro.data$Age[j], time.sd)
#time.samples <- runif(reps, min=(enviro.data$Age[j] - enviro.data$Age_Error[j]), max=(enviro.data$Age[j] + enviro.data$Age_Error[j]))
time.col <- rbind(time.col, as.data.frame(time.samples))
recon.sd <- sd(c(enviro.data$C4_recon_mean[j], enviro.data$C4_recon_lower[j], enviro.data$C4_recon_upper[j]))
recon.samples <- rnorm(reps, enviro.data$C4_recon_mean[j], recon.sd/2)
#recon.samples <- runif(reps, min=enviro.data$C4_recon_lower[j], max=enviro.data$C4_recon_upper[j])
recon.col <- rbind(recon.col, as.data.frame(recon.samples))
}
time.est.samples <- cbind(time.col, recon.col)
time.est.samples[time.est.samples<0] <- 0
time.est.samples[time.est.samples>100] <- 100
time.est.samples <- time.est.samples[order(time.est.samples$time.samples),]
return(time.est.samples)
}
We’ll actually reconstruct those data now:
enviro <- time.estimates(enviro.data, 100)
plot(enviro)
Let’s quickly visualize the data in a few different ways to get an idea of what’s going on
# first using Phenogram from Phytools
phenogram(trim.tree, macro.means)
#plotTree.wBars(trim.tree, macro.means, args.plotTree=list(border=F, fsize=0.5)) # type="fan"
plotTree.barplot(trim.tree, macro.means, args.barplot=list(beside=TRUE, border=F))
dotTree(trim.tree, macro.means, ftype="i")
Correlative models like the environmental models in RPANDA will be sensitive to the amount of smoothing to the trend line of the input data (see Clavel & Morlon, PNAS). To address this, we’ll create a function that searches for the optimum smoothness of the trend by fitting a set of values.
best.smoothing <- function (phy, trait.data, time.data=InfTemp, degrees=c(0,10,20,30,40,50), model="EnvExp", cores=6) {
res.list <- mclapply(1:length(degrees), function(x) {
fit_t_env(phy, trait.data, env_data=time.data, df=degrees[x], scale=F, plot=T, model=model)}, mc.cores = cores)
for(i in 1:length(res.list)){res.list[[i]]$df <- degrees[i]}
res.values <- unlist(lapply(res.list, function(x) x$aicc)) # make a vector of the values, so we can get the index number of the best
best.res <- res.list[[which.min(res.values)]]
plot(best.res, main=paste(model, "; AICc = ", round(best.res$aicc,2)), sub=paste("sigma = ",round(best.res$param[1],2), " beta = ",round(best.res$param[2],2), " df = ", best.res$df), col="red")
return(list(all.results=res.list, best.result=best.res, best.df=best.res$df))
}
We can have a look at what this smoothing actually does to our data. We can come back to look at these once we get the optimum fits for our models.
#grass.spline0 <- sm.spline(x=enviro$time.samples, y=enviro$recon.samples, df=0)
#grass.spline10 <- sm.spline(x=enviro$time.samples, y=enviro$recon.samples, df=10)
#grass.spline20 <- sm.spline(x=enviro$time.samples, y=enviro$recon.samples, df=20)
grass.spline30 <- sm.spline(x=enviro$time.samples, y=enviro$recon.samples, df=30)
grass.spline40 <- sm.spline(x=enviro$time.samples, y=enviro$recon.samples, df=40)
grass.spline50 <- sm.spline(x=enviro$time.samples, y=enviro$recon.samples, df=50)
plot(enviro, main="C4 Grass Reconstruction Through Time", xlab="Millions of Years Ago", ylab="Percent C4")
#lines(grass.spline0, col="red", lwd=4)
#lines(grass.spline10, col="green", lwd=4)
#lines(grass.spline20, col="blue", lwd=4)
lines(grass.spline30, col="yellow", lwd=4)
lines(grass.spline40, col="violet", lwd=4)
lines(grass.spline50, col="gold", lwd=4)
data(InfTemp)
env.spline0 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=0)
env.spline10 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=10)
env.spline20 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=20)
env.spline30 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=30)
env.spline40 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=40)
env.spline50 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=50)
plot(InfTemp, main="Paleotemperature Through Time")
lines(env.spline0, col="red")
lines(env.spline10, col="green")
lines(env.spline20, col="blue")
lines(env.spline30, col="yellow")
lines(env.spline40, col="magenta")
lines(env.spline50, col="cyan")
flux.spline0 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=0)
flux.spline10 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=10)
flux.spline20 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=20)
flux.spline30 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=30)
flux.spline40 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=40)
flux.spline50 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=50)
plot(flux.data, main="Aeolian Dust Flux Through Time", xlab="Millions of Years Ago", ylab="Aeolian Dust Flux")
lines(flux.spline0, col="red", lwd=4)
lines(flux.spline10, col="green", lwd=4)
lines(flux.spline20, col="blue", lwd=4)
lines(flux.spline30, col="yellow", lwd=4)
lines(flux.spline40, col="magenta", lwd=4)
lines(flux.spline50, col="cyan", lwd=4)
Start with the environmental model of paleotemperature. You can designate the number of cores and the amount of smoothing
# linear model first
ENVexp <- best.smoothing(trim.tree, macro.means, time.data=InfTemp, degrees=c(10,20,30,40,50), model="EnvExp", cores=5)
# exponential model next
ENVlin <- best.smoothing(trim.tree, macro.means, time.data=InfTemp, degrees=c(10,20,30,40,50), model="EnvLin", cores=5)
Next up the grass model, using C4 reconstructions.
GRASSexp <- best.smoothing(trim.tree, macro.means, time.data=enviro, degrees=c(30,40,50), model="EnvExp", cores=3)
GRASSlin <- best.smoothing(trim.tree, macro.means, time.data=enviro, degrees=c(30,40,50), model="EnvLin", cores=3)
And finally the flux models, using aeolian dust measurements.
FLUXexp <- best.smoothing(trim.tree, macro.means, time.data=flux.data, degrees=c(10,20,30,40,50), model="EnvExp", cores=5)
FLUXlin <- best.smoothing(trim.tree, macro.means, time.data=flux.data, degrees=c(10,20,30,40,50), model="EnvLin", cores=5)
Lastly, for comparison, run a few standard models. These are Brownian Motion, Brownian Motion with a Trend, and Early Burst.
BM_res <- fitContinuous(trim.tree, species.means, model="BM")
trend_res <- fitContinuous(trim.tree, species.means, model="trend")
EB_res <- fitContinuous(trim.tree, species.means, model="EB")
Compare the models with AICc, and check differences across the trees
model_FIT <- c(ENVexp$best.result$aicc, ENVlin$best.result$aicc,
GRASSexp$best.result$aicc, FLUXexp$best.result$aicc, FLUXlin$best.result$aicc, #GRASSlin$best.result$aicc,
BM_res$opt$aicc, trend_res$opt$aicc, EB_res$opt$aicc); names(model_FIT) <- c("ENVexp", "ENVlin", "GRASSexp", "FLUXexp", "FLUXlin", "BM", "Trend", "EB")
aic.w(model_FIT)
## ENVexp ENVlin GRASSexp FLUXexp FLUXlin BM Trend EB
## 0 0 1 0 0 0 0 0
fit.aic <- as.data.frame(as.vector(aic.w(model_FIT))); fit.aic$model <- names(model_FIT); colnames(fit.aic) <- c("aiccw", "model"); fit.aic$age <- "Min_Ages"
Quickly collapse models from the same data
fit.aic$model.type <- c("ENV", "ENV", "GRASS", "FLUX", "FLUX", "BM", "Trend", "EB")
fit.aic$model.type <- factor(fit.aic$model.type, levels=c("ENV", "FLUX", "GRASS", "BM", "EB", "Trend"))
Then plot the model fits as AICc weights
library(wesanderson)
(ggplot(fit.aic)
+ geom_bar(aes(y=aiccw, x=age, fill=model.type), stat="identity")
#+ theme(axis.text.x=element_text(angle=25, hjust=1), panel.background=element_blank(), legend.position="bottom")
+ theme_classic()
+ scale_fill_manual( values=wes_palette("Zissou1", 6, "continuous")))
We can further look at our data by plottin Disparity Through Time to our data Then we can simulate data under the preferred model (GRASSexp or GRASSlin) to see if our data fits the expectation.
simGRASSexp <- sim_t_env(trim.tree, GRASSexp$best.result$param, model="EnvExp", env_data=enviro,
root.value=GRASSexp$best.result$root, plot=T)
simGRASSlin <- sim_t_env(trim.tree, GRASSlin$best.result$param, model="EnvLin", env_data=enviro,
root.value=GRASSlin$best.result$root, plot=T)
And again run DTT on our simulated data to see if the patterns hold up.
dttGRASSexp <- dtt(trim.tree, simGRASSexp, nsim=100, plot=T)
dttGRASSlin <- dtt(trim.tree, simGRASSlin, nsim=100, plot=T)
# obviously you'll increase the nsim to >1000.
Ok, now that we’ve fit the models to a given tree, we want to fit the models to lots of trees of different ages, shapes, etc. Let’s read in a file I’ve made of trees from the whole span of dating analyses.
tree.span <- read.tree("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/Tree_Span.tre")
tree.span
## 79 phylogenetic trees
Now we’ll run a loop across all of these trees, to fit the models to each one. It may take a little while.
# MAKE THIS EVAL=TRUE IF YOU WANT TO DO THIS BIT FOR REAL
all.aics <- NULL; all.results <- NULL
for (k in 1:length(tree.span)){
beginning <- Sys.time()
int.results <- NULL
# Fit a number of models to the data (ENV, GRASS, BM, EB, Trend, Drift)
ENVexp <- best.smoothing(tree.span[[k]], macro.means, time.data=InfTemp, degrees=c(10,20,30,40,50), model="EnvExp", cores=5); int.results[["ENVexp"]] <- ENVexp$best.result;
ENVlin <- best.smoothing(tree.span[[k]], macro.means, time.data=InfTemp, degrees=c(10,20,30,40,50), model="EnvLin", cores=5); int.results[["ENVlin"]] <- ENVlin$best.result;
GRASSexp <- best.smoothing(tree.span[[k]], macro.means, time.data=testo, degrees=c(30,50), model="EnvExp", cores=3); int.results[["GRASSexp"]] <- GRASSexp$best.result;
#GRASSlin <- best.smoothing(trim.tree, macro.means, time.data=testo, degrees=c(30,40,50), model="EnvLin", cores=3)
FLUXexp <- best.smoothing(tree.span[[k]], macro.means, time.data=flux.data, degrees=c(10,20,30,40,50), model="EnvExp", cores=5); int.results[["FLUXexp"]] <- FLUXexp$best.result;
FLUXlin <- best.smoothing(tree.span[[k]], macro.means, time.data=flux.data, degrees=c(10,20,30,40,50), model="EnvLin", cores=5); int.results[["FLUXlin"]] <- FLUXlin$best.result;
BM_res <- fitContinuous(tree.span[[k]], species.means, model="BM"); int.results[["BM"]] <- BM_res
trend_res <- fitContinuous(tree.span[[k]], species.means, model="trend"); int.results[["Trend"]] <- trend_res
EB_res <- fitContinuous(tree.span[[k]], species.means, model="EB"); int.results[["EB"]] <- EB_res
curr_tree_FIT <- c(ENVexp$best.result$aicc, ENVlin$best.result$aicc,
GRASSexp$best.result$aicc, FLUXexp$best.result$aicc, FLUXlin$best.result$aicc, #GRASSlin$best.result$aicc,
BM_res$opt$aicc, trend_res$opt$aicc, EB_res$opt$aicc); names(curr_tree_FIT) <- c("ENVexp", "ENVlin", "GRASSexp", "FLUXexp", "FLUXlin", "BM", "Trend", "EB")
curr.aic <- as.data.frame(as.vector(aic.w(curr_tree_FIT)));
curr.aic$model <- names(curr_tree_FIT); colnames(curr.aic) <- c("aiccw", "model");
curr.aic$age <- round(max(nodeHeights(tree.span[[k]])), 3)
curr.aic$tree <- k
all.results[[k]] <- int.results
curr.aic$model.type <- c("ENV", "ENV", "GRASS", "FLUX", "FLUX", "BM", "Trend", "EB")
# ENVexp <- ENVexp$best.result; ENVlin <- ENVlin$best.result; GRASSexp <- GRASSexp$best.result; FLUXexp <- FLUXexp$best.result; FLUXlin <- FLUXlin$best.result
# curr.aic[which(curr.aic$aiccw == max(curr.aic$aiccw)),"model"]
all.aics <- rbind.data.frame(all.aics, curr.aic);
end <- Sys.time()
duration <- format(end-beginning)
print(paste("Computation time :", duration))
}
Save the file externally:
# MAKE THIS EVAL=TRUE IF YOU WANT TO DO THIS BIT FOR REAL
all.aics$age <- factor(all.aics$age, levels=c("Min_Ages", "Mean_Ages", "Max_Ages"))
all.aics$model.type <- factor(all.aics$model.type, levels=c("ENV", "FLUX", "GRASS", "BM", "EB", "Trend"))
#saveRDS(all.aics, file="/Users/Ian/Desktop/Macropod_Dating/Model_Fitting_AICCs.RDS")
Or skip the work and read in the file instead:
all.aics <- readRDS("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Model_Fitting.RDS")
all.aics$age <- as.factor(all.aics$age)
(ggplot(all.aics)
+ geom_bar(aes(y=aiccw, x=age, fill=model.type), stat="identity")
+ theme(axis.text.x=element_text(angle=90, hjust=1), panel.background=element_blank(), legend.position="bottom")
+ scale_fill_manual( values=wes_palette("Zissou1", 6, "continuous")))
Make a quick function to get the DEPTH of a node (from present), instead of the HEIGHT (from root)
MRCA.depth <- function(phy){max(nodeHeights(phy)) - findMRCA(phy, tips=c("Setonix_brachyurus", "Wallabia_bicolor"), type="height")}
Prepare trees for comparison of ages
age.cp.min <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/Macro_CPMinAges_Constrained_PF/Macro_CP_MinAges_NEWICK.trees"); age.cp.min <- age.cp.min[(length(age.cp.min)-200):length(age.cp.min)]
cp.min <- as.data.frame(unlist(lapply(age.cp.min, MRCA.depth))); cp.min$tree <- "cp.min"; colnames(cp.min) <- c("age", "tree")
age.cp.mean <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/Macro_CPEstAges_Constrained_PF/Macro_CP_EstAges_NEWICK.trees"); age.cp.mean <- age.cp.mean[(length(age.cp.mean)-200):length(age.cp.mean)]
cp.est <- as.data.frame(unlist(lapply(age.cp.mean, MRCA.depth))); cp.est$tree <- "cp.est"; colnames(cp.est) <- c("age", "tree")
age.cp.max <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/Macro_CPMaxAges_Constrained_PF/Macro_CP_MaxAges_NEWICK.trees"); age.cp.max <- age.cp.max[(length(age.cp.max)-200):length(age.cp.max)]
cp.max <- as.data.frame(unlist(lapply(age.cp.max, MRCA.depth))); cp.max$tree <- "cp.max"; colnames(cp.max) <- c("age", "tree")
age.range.strict <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/Macro_AgesRanges_LinksCorrected_STRICT/Macro_AgesRanges_LinksCorrected_Newick.trees"); age.range.strict <- age.range.strict[(length(age.range.strict)-200):length(age.range.strict)]
range.strict <- as.data.frame(unlist(lapply(age.range.strict, MRCA.depth))); range.strict$tree <- "range.strict"; colnames(range.strict) <- c("age", "tree")
age.all <- rbind(cp.min, cp.est, cp.max, range.strict)
age.all$tree <- factor(age.all$tree, levels=c("cp.max", "range.strict", "cp.est", "cp.min"))
(ggplot(age.all, aes(x=age, fill=tree))
+ geom_density(alpha=0.75, adjust=1.5)
+ theme(axis.text.x=element_text(angle=0, hjust=1), panel.background=element_blank(), legend.position="bottom")
+ scale_fill_manual(values=wes_palette("Zissou1", type="continuous", 4))
+ scale_x_reverse(lim=c(19,9)))
prior.trees <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/Prior_Only_NEWICK.trees"); prior.trees <- prior.trees[(length(prior.trees)-200):length(prior.trees)]
post.trees <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/Macro_CP_EstAges_NEWICK.trees"); post.trees <- post.trees[(length(post.trees)-200):length(post.trees)]
Choose which tips you want information for:
fossil_taxa <- c("Baringa_nelsonensis", "Bohra_illuminata", "Congruus_congruus",
"Dorcopsoides_fossilis", "Ganguroo_bilamina","Hadronomas_puckridgi",
"Kurrabi_mahoneyi","Macropus_pavana","Ngamaroo_archeri",
"Bulungamaya_delicata","Prionotemnus_palankarinnicus",
"Procoptodon_goliah","Protemnodon_anak","Simosthenurus_occidentalis",
"Sthenurus_andersoni","Troposodon_minor","Wanburoo_hilarus")
Get the node numbers of the tips
nodes <- sapply(fossil_taxa,function(x,y) which(y==x),y=tree$tip.label)
Then get the edge lengths for those nodes
edge.lengths <- setNames(tree$edge.length[sapply(nodes,
function(x,y) which(y==x),y=tree$edge[,2])],names(nodes))
The faster way is to make a function to do this:
get_terminal_branchlengths <- function(phy, tipnames){
## Get the node numbers of the tips
nodes <- sapply(tipnames,function(x,y) which(y==x),y=phy$tip.label)
## Then get the edge lengths for those nodes
edge.lengths <- setNames(phy$edge.length[sapply(nodes,
function(x,y) which(y==x),y=phy$edge[,2])],names(nodes))
return(edge.lengths)
}
Now that we’ve got the tips and branch lengths, we can compare the posterior to the prior
BFSA <- function(prior.phy, posterior.phy, tips){
post <- lapply(posterior.phy, get_terminal_branchlengths, tipnames=tips); names(post) <- NULL; post <- unlist(post)
prior <- lapply(prior.phy, get_terminal_branchlengths, tipnames=tips); names(prior)<- NULL; prior <- unlist(prior)
BFs <- NULL
for (j in 1:length(tips)){
curr.tip <- subset(post, names(post)==tips[j]);
probSA <- sum(curr.tip<=0); probTIP <- length(curr.tip)-probSA;
curr.tip <- subset(prior, names(prior)==tips[j]);
priorSA <- sum(curr.tip<=0); priorTIP <- length(curr.tip)-priorSA;
curr.BF <- log((probSA * priorTIP) / (probTIP * priorSA))
if(is.na(curr.BF)){curr.BF <- 0}
#curr.BF <- log(probSA/(length(curr.tip)-probSA))
names(curr.BF) <- tips[j]; curr.BF <- round(curr.BF, 2)
BFs <- append(BFs, curr.BF)
}
return(BFs)
}
Orient the data appropriately
macro_BFs <- BFSA(prior.trees, post.trees, tips=fossil_taxa)
macro_BFs <- as.data.frame(macro_BFs) # make the vector a data frame
macro_BFs[which(macro_BFs$macro_BFs > 5),] <- 5 # change any really big (INF) numbers to 5
macro_BFs[which(macro_BFs$macro_BFs < -5),] <- -5 # change any really small (-INF) numbers to -5
macro_BFs$taxa <- rownames(macro_BFs); # create column with the taxon names
macro_BFs <- macro_BFs[order(macro_BFs$macro_BFs),] # reorder by BF values
# set colors for plotting
macro_BFs$color <- "black";
macro_BFs[which(macro_BFs$macro_BFs > 1),]$color <- "#3d98d3"
macro_BFs[which(macro_BFs$macro_BFs < -1),]$color <- "#FF7175"
macro_BFs$taxa <- factor(macro_BFs$taxa, levels=c(macro_BFs$taxa)) # set factors for plotting (NOT NECESSARY)
Then plot the data
ggplot(macro_BFs, aes(x=taxa, y=macro_BFs, label=macro_BFs)) +
geom_ribbon(ymin=-1, ymax=+1) +
geom_point(stat='identity', size=8, color=macro_BFs$color) +
# geom_segment(aes(y = 0,
# x = taxa,
# yend = macro_BFs,
# xend = taxa),
# color = macro_BFs$color) +
geom_text(color="white", size=2) +
#labs(title="Bayes Factor Support", subtitle="for Fossil Taxa as Sampled Ancestors") +
#ylim(-5, 5) +
scale_y_continuous(name="log Bayes Factors", limits=c(-5,5), breaks=c(-5:5)) +
#theme(panel.background=element_blank()) +
geom_hline(yintercept=-1) +
geom_hline(yintercept=1) +
xlab("Fossil Taxa") +
#ylab("log Bayes Factors") +
theme_classic() +
coord_flip()
Plot the C4 and Flux data on a geological time scale
library(deeptime); library(gridExtra)
pp <- ggplot(enviro.data, aes(Age)) +
geom_ribbon(aes(ymin = C4_recon_lower, ymax = C4_recon_upper), fill = "pink") +
geom_line(aes(y = C4_recon_mean), color="red") + scale_x_reverse() + theme_classic() +
coord_cartesian(xlim = c(0, 10), ylim = c(0,80), expand = FALSE)
qq <- gggeo_scale(pp, dat="epochs")
rr <- ggplot(flux.data, aes(Age)) +
geom_ribbon(aes(ymin = A_Flux-35, ymax = A_Flux+35), fill = "light blue") +
geom_line(aes(y = A_Flux), color="blue") + scale_x_reverse() + theme_classic() +
coord_cartesian(xlim = c(0, 13), ylim = c(0,150), expand = FALSE)
ss <- gggeo_scale(rr, dat="epochs")
grid.arrange(qq, ss, nrow=1)
Create a function to pull the ages of each fossil taxon estimated
get.fossil.ages <- function(fossil.tips, trees){
tree.tables <- lapply(trees, print.tree)
fossil.tables <- lapply(1:length(tree.tables), function(x) {
subset(tree.tables[[x]], tree.tables[[x]]$label %in% fossil.tips)
})
fossil.ages <- lapply(1:length(fossil.tables), function(x) {
select(fossil.tables[[x]], label, time_bp)
})
final <- bind_rows(fossil.ages)
}
Need to create a function called “print.tree” that I borrowed some code from biogeoBEARS
Now pull out the info on those fossils
my.test <- get.fossil.ages(fossil.tips = fossil_taxa, trees = post.trees)
my.test$label <- factor(my.test$label, levels=c("Bulungamaya_delicata",
"Protemnodon_anak",
"Sthenurus_andersoni",
"Troposodon_minor",
"Baringa_nelsonensis",
"Prionotemnus_palankarinnicus",
"Congruus_congruus",
"Bohra_illuminata",
"Kurrabi_mahoneyi",
"Ngamaroo_archeri",
"Dorcopsoides_fossilis",
"Wanburoo_hilarus",
"Ganguroo_bilamina",
"Hadronomas_puckridgi",
"Simosthenurus_occidentalis",
"Macropus_pavana",
"Procoptodon_goliah"))
(ggplot(my.test, aes(x=time_bp, y=label, fill=..x..))
+ scale_fill_gradientn(colours=wes_palette("Zissou1"))
+ geom_density_ridges_gradient(scale=2)
+ scale_x_reverse()
+ theme_classic())
## Picking joint bandwidth of 2.28e-07